/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.parse;

import java.util.ArrayList;

import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.NoViableAltException;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.Token;
import org.antlr.runtime.TokenRewriteStream;
import org.antlr.runtime.TokenStream;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeAdaptor;
import org.antlr.runtime.tree.TreeAdaptor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.Context;

/**
 * ParseDriver.
 */
public class ParseDriver {

    private static final Logger LOG = LoggerFactory.getLogger("hive.ql.parse.ParseDriver");

    /**
     * ANTLRNoCaseStringStream.
     */
    //This class provides and implementation for a case insensitive token checker
    //for the lexical analysis part of antlr. By converting the token stream into
    //upper case at the time when lexical rules are checked, this class ensures that the
    //lexical rules need to just match the token with upper case letters as opposed to
    //combination of upper case and lower case characteres. This is purely used for matching lexical
    //rules. The actual token text is stored in the same way as the user input without
    //actually converting it into an upper case. The token values are generated by the consume()
    //function of the super class ANTLRStringStream. The LA() function is the lookahead funtion
    //and is purely used for matching lexical rules. This also means that the grammar will only
    //accept capitalized tokens in case it is run from other tools like antlrworks which
    //do not have the ANTLRNoCaseStringStream implementation.
    public class ANTLRNoCaseStringStream extends ANTLRStringStream {

        public ANTLRNoCaseStringStream(String input) {
            super(input);
        }

        @Override
        public int LA(int i) {

            int returnChar = super.LA(i);
            if(returnChar == CharStream.EOF) {
                return returnChar;
            } else if(returnChar == 0) {
                return returnChar;
            }

            return Character.toUpperCase((char) returnChar);
        }
    }

    /**
     * HiveLexerX.
     */
    public class HiveLexerX extends HiveLexer {

        private final ArrayList<ParseError> errors;

        public HiveLexerX() {
            super();
            errors = new ArrayList<ParseError>();
        }

        public HiveLexerX(CharStream input) {
            super(input);
            errors = new ArrayList<ParseError>();
        }

        @Override
        public void displayRecognitionError(String[] tokenNames, RecognitionException e) {
            errors.add(new ParseError(this, e, tokenNames));
        }

        @Override
        public String getErrorMessage(RecognitionException e, String[] tokenNames) {
            String msg = null;

            if(e instanceof NoViableAltException) {
                @SuppressWarnings("unused") NoViableAltException nvae = (NoViableAltException) e;
                // for development, can add
                // "decision=<<"+nvae.grammarDecisionDescription+">>"
                // and "(decision="+nvae.decisionNumber+") and
                // "state "+nvae.stateNumber
                msg = "character " + getCharErrorDisplay(e.c) + " not supported here";
            } else {
                msg = super.getErrorMessage(e, tokenNames);
            }

            return msg;
        }

        public ArrayList<ParseError> getErrors() {
            return errors;
        }

    }

    /**
     * Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes
     * so that the graph walking algorithms and the rules framework defined in
     * ql.lib can be used with the AST Nodes.
     */
    public static final TreeAdaptor adaptor = new CommonTreeAdaptor() {
        /**
         * Creates an ASTNode for the given token. The ASTNode is a wrapper around
         * antlr's CommonTree class that implements the Node interface.
         *
         * @param payload
         *          The token.
         * @return Object (which is actually an ASTNode) for the token.
         */
        @Override
        public Object create(Token payload) {
            return new ASTNode(payload);
        }

        @Override
        public Object dupNode(Object t) {

            return create(((CommonTree) t).token);
        }

        ;

        @Override
        public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) {
            return new ASTErrorNode(input, start, stop, e);
        }

        ;
    };

    public ASTNode parse(String command) throws ParseException {
        return parse(command, null);
    }

    public ASTNode parse(String command, Context ctx) throws ParseException {
        return parse(command, ctx, null);
    }

    /**
     * Parses a command, optionally assigning the parser's token stream to the given context.
     *
     * @param command command to parse
     * @param ctx     context with which to associate this parser's token stream, or
     *                null if either no context is available or the context already has
     *                an existing stream
     * @return parsed AST
     */
    public ASTNode parse(String command, Context ctx, String viewFullyQualifiedName) throws ParseException {
        /*************************************************
         * TODO_MA 马中华 https://blog.csdn.net/zhongqi2513
         *  注释： Antlr 对 Hive SQL 解析的代码如如下方法的逻辑，HiveLexerX，HiveParser分别是 Antlr对语法文件 HiveLexer.g
         *  编译后自动生成的词法解析和语法解析类，在这两个类中进行复杂的解析。
         *  如果想要增加 hive 的一种新的语法，需要在 HiveParser.g 文件中进行定义
         *  如果要引入关键字，则在 HiveLexer.g 中进行定义修改
         *  1、解析 command 生成 Tokens Tree
         *  2、解析 Tokens Tree 生成 ASTNode 也就是 AST
         *  总结：
         *  1、hive的底层关于SQL的语法和词法解析的工具组件：antlr
         *  2、sparksql底层的关于SQL的语法和词法解析的组件：calcite
         *  3、HBase不支持SQL语法，但是如果使用phoenix就可以写SQL来操作HBase
         *      phoenix 把用户编写的SQL 转换成 HBase能识别的ruby语法
         */

        if(LOG.isDebugEnabled()) {
            LOG.debug("Parsing command: " + command);
        }

        /*************************************************
         * TODO_MA 马中华 https://blog.csdn.net/zhongqi2513
         *  注释： 词法分析， 忽略关键字的大小写
         *  输入：一堆字符，这里是 HiveSQL
         *  输出：一串 Toker，这里是 TokenRewriteStream
         *  -
         *  Antlr对语法文件 HiveLexer.g 编译后自动生成的词法解析和语法解析类（HiveLexerX，HiveParser）
         *  文件 HiveLexer.g 定义了一些 hive 的关键字，from、where，数字的定义格式【0–9】，分隔符，比较符之类的。
         *  每一个关键字分支都会变成一个 token。
         *  -
         *  HiveLexerX 是 antlr 根据词法规则文件，通过编译生成的一个代码类
         *  能够执行词法和语法的解析   最终生成一个 ASTNode
         *  -
         *  HiveLexer.g 这个文件在： hive-exec 子项目中的： org.apache.hadoop.hive.ql.parse 包
         */
        HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));

        /*************************************************
         * TODO_MA 马中华 https://blog.csdn.net/zhongqi2513
         *  注释： 根据词法分析的结果得到 tokens 的，此时不只是单纯的字符串，
         *  而是具有特殊意义的字符串的封装，其本身是一个流。
         *  lexer 把 SQL 语句中的各个语法分支，都转换成底层引擎能识别的各种 Token
         *  使用词法和语法解析，来对 SQL 中的关键字 和 逻辑执行 TOK 替代
         */
        TokenRewriteStream tokens = new TokenRewriteStream(lexer);

        if(ctx != null) {
            if(viewFullyQualifiedName == null) {
                // Top level query
                ctx.setTokenRewriteStream(tokens);
            } else {
                // It is a view
                ctx.addViewTokenRewriteStream(viewFullyQualifiedName, tokens);
            }
            lexer.setHiveConf(ctx.getConf());
        }

        // TODO_MA 注释：语法解析 HiveParser是 Antlr 根据 HiveParser.g 生成的文件
        HiveParser parser = new HiveParser(tokens);

        if(ctx != null) {
            parser.setHiveConf(ctx.getConf());
        }
        parser.setTreeAdaptor(adaptor);
        HiveParser.statement_return r = null;
        // r.getTree;
        try {

            /*************************************************
             * TODO_MA 马中华 https://blog.csdn.net/zhongqi2513
             *  注释： 111 转化为 ASTTree 放在 ASTNode 中的 tree 属性中。 通过 r.getTree() 获取返回。
             *  当前这句代码完成了从 Tok 树到 AST 的转变
             *  把结果放在了 HiveParser.statement_return
             */
            r = parser.statement();
        } catch(RecognitionException e) {
            e.printStackTrace();
            throw new ParseException(parser.errors);
        }

        if(lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
            LOG.debug("Parse Completed");
        } else if(lexer.getErrors().size() != 0) {
            throw new ParseException(lexer.getErrors());
        } else {
            throw new ParseException(parser.errors);
        }

        /*************************************************
         * TODO_MA 马中华 https://blog.csdn.net/zhongqi2513
         *  注释： 222 返回抽象语法树
         */
        ASTNode tree = (ASTNode) r.getTree();

        tree.setUnknownTokenBoundaries();
        System.out.println(tree.dump());
        return tree;
    }

    /*
     * Parse a string as a query hint.
     */
    public ASTNode parseHint(String command) throws ParseException {
        LOG.info("Parsing hint: " + command);

        HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
        TokenRewriteStream tokens = new TokenRewriteStream(lexer);
        HintParser parser = new HintParser(tokens);
        parser.setTreeAdaptor(adaptor);
        HintParser.hint_return r = null;
        try {
            r = parser.hint();
        } catch(RecognitionException e) {
            e.printStackTrace();
            throw new ParseException(parser.errors);
        }

        if(lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
            LOG.info("Parse Completed");
        } else if(lexer.getErrors().size() != 0) {
            throw new ParseException(lexer.getErrors());
        } else {
            throw new ParseException(parser.errors);
        }

        return (ASTNode) r.getTree();
    }

    /*
     * parse a String as a Select List. This allows table functions to be passed expression Strings
     * that are translated in
     * the context they define at invocation time. Currently used by NPath to allow users to specify
     * what output they want.
     * NPath allows expressions n 'tpath' a column that represents the matched set of rows. This
     * column doesn't exist in
     * the input schema and hence the Result Expression cannot be analyzed by the regular Hive
     * translation process.
     */
    public ASTNode parseSelect(String command, Context ctx) throws ParseException {
        if(LOG.isDebugEnabled()) {
            LOG.debug("Parsing command: " + command);
        }

        HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
        TokenRewriteStream tokens = new TokenRewriteStream(lexer);
        if(ctx != null) {
            ctx.setTokenRewriteStream(tokens);
        }
        HiveParser parser = new HiveParser(tokens);
        parser.setTreeAdaptor(adaptor);
        HiveParser_SelectClauseParser.selectClause_return r = null;
        try {
            r = parser.selectClause();
        } catch(RecognitionException e) {
            e.printStackTrace();
            throw new ParseException(parser.errors);
        }

        if(lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
            LOG.debug("Parse Completed");
        } else if(lexer.getErrors().size() != 0) {
            throw new ParseException(lexer.getErrors());
        } else {
            throw new ParseException(parser.errors);
        }

        return (ASTNode) r.getTree();
    }

    public ASTNode parseExpression(String command) throws ParseException {
        LOG.info("Parsing expression: " + command);

        HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
        TokenRewriteStream tokens = new TokenRewriteStream(lexer);
        HiveParser parser = new HiveParser(tokens);
        parser.setTreeAdaptor(adaptor);
        HiveParser_IdentifiersParser.expression_return r = null;
        try {
            r = parser.expression();
        } catch(RecognitionException e) {
            e.printStackTrace();
            throw new ParseException(parser.errors);
        }

        if(lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
            LOG.info("Parse Completed");
        } else if(lexer.getErrors().size() != 0) {
            throw new ParseException(lexer.getErrors());
        } else {
            throw new ParseException(parser.errors);
        }

        return (ASTNode) r.getTree();
    }
}
